Here is the report of the network analysis of the crop health data.

loading the library

library(dplyr)
library(plyr)
library(reshape)
library(reshape2)
library(gridExtra)
library(lubridate)
library(doBy)
library(cluster)
library(ggplot2)
library(scales)
library(bioDist)
library(vegan)
library(mvtnorm)
library(igraph)
library(WGCNA)
## ==========================================================================
## *
## *  Package WGCNA 1.47 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=4
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=4
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================
library(cowplot)
library(DiffCorr)
library(pheatmap)

Load the survey data

library(RCurl) # run this package for load the data form the website 

file <- getURL("https://docs.google.com/spreadsheets/d/1zB7gNdI7Nk7SuHuPWcjzaKnjuwkvL6sOVMo0zMfuV-c/pub?gid=558862364&single=true&output=csv") # load data from the google drive

data <- read.csv(text = file) # read data which is formated as the csv

clean the data

data[data == "-"] <- NA  # replace '-' with NA

data[data == ""] <- NA  # replace 'missing data' with NA

# ==== to lower variable names ====
names(data) <- tolower(names(data))  # for more consistancy

data <- data[, !names(data) %in% c("phase", "identifier", "village", "year", "season", "lat", "long", 
                                   "fa", "fn", "fp", "lfm", "ced", "cedjul", "hd", "hdjul", "cvr", 
                                   "varcoded", "fymcoded", "mu", "nplsqm", "rbpx")
             ]

corract the variables

#==== corract the variable type =====
data <- transform(data, country = as.factor(country), pc = as.factor(pc), 
    cem = as.factor(cem), ast = as.factor(ast), ccd = as.numeric(ccd), 
    vartype = as.factor(vartype), fym = as.character(fym), n = as.numeric(n), 
    p = as.numeric(p), k = as.numeric(k), mf = as.numeric(mf), wcp = as.factor(wcp), 
    iu = as.numeric(iu), hu = as.numeric(hu), fu = as.numeric(fu), cs = as.factor(cs), 
    ldg = as.numeric(ldg), yield = as.numeric(yield), dscum = as.factor(dscum), 
    wecum = as.factor(wecum), ntmax = as.numeric(ntmax), npmax = as.numeric(npmax), 
    nltmax = as.numeric(nltmax), nlhmax = as.numeric(nltmax), waa = as.numeric(waa), 
    wba = as.numeric(wba), dhx = as.numeric(dhx), whx = as.numeric(whx), 
    ssx = as.numeric(ssx), wma = as.numeric(wma), lfa = as.numeric(lfa), 
    lma = as.numeric(lma), rha = as.numeric(rha), thrx = as.numeric(thrx), 
    pmx = as.numeric(pmx), defa = as.numeric(defa), bphx = as.numeric(bphx), 
    wbpx = as.numeric(wbpx), awx = as.numeric(awx), rbx = as.numeric(rbx), 
    rbbx = as.numeric(rbbx), glhx = as.numeric(glhx), stbx = as.numeric(stbx), 
    hbx = as.numeric(hbx), bbx = as.numeric(bbx), blba = as.numeric(blba), 
    lba = as.numeric(lba), bsa = as.numeric(bsa), blsa = as.numeric(blsa), 
    nbsa = as.numeric(nbsa), rsa = as.numeric(rsa), lsa = as.numeric(lsa), 
    shbx = as.numeric(shbx), shrx = as.numeric(shrx), srx = as.numeric(srx), 
    fsmx = as.numeric(fsmx), nbx = as.numeric(nbx), dpx = as.numeric(dpx), 
    rtdx = as.numeric(rtdx), rsdx = as.numeric(rsdx), gsdx = as.numeric(gsdx), 
    rtx = as.numeric(rtx))

recoding the variables

data$pc <- ifelse(data$pc == "rice", 1, 0)

# Crop establisment method
levels(data$cem)[levels(data$cem) == "trp"] <- 1
levels(data$cem)[levels(data$cem) == "TPR"] <- 1
levels(data$cem)[levels(data$cem) == "DSR"] <- 2
levels(data$cem)[levels(data$cem) == "dsr"] <- 2

# fym there are two type 0 and 1, raw data are recorded as no, yes, and value, if the value is 0 which mean 0 and if the value more than 0 which means 1 

data$fym <- ifelse(data$fym == "no", 0, 
                   ifelse(data$fym == "0", 0, 1
                   )
)

# vartype there are three type treditional varieties, modern varities and hybrid
data$vartype <- ifelse(data$vartype == "tv", 1,
                       ifelse(data$vartype == "mv", 2,
                              ifelse(data$vartype == "hyb", 3, NA
                              )
                       )
)


# wcp weed control management
levels(data$wcp)[levels(data$wcp) == "hand"] <- 1
levels(data$wcp)[levels(data$wcp) == "herb"] <- 2
levels(data$wcp)[levels(data$wcp) == "herb-hand"] <- 3


# Crop Status
levels(data$cs)[levels(data$cs) == "very poor"] <- 1
levels(data$cs)[levels(data$cs) == "poor"] <- 2
levels(data$cs)[levels(data$cs) == "average"] <- 3
levels(data$cs)[levels(data$cs) == "good"] <- 4
levels(data$cs)[levels(data$cs) == "very good"] <- 5

cluster the production situation

# clean the data
num.data <- apply(data[, -c(1,2)], 2, as.numeric)
num.data <- as.data.frame(as.matrix(num.data))
data <- cbind(data[1:2], num.data)
data <- data[,apply(data[, -c(1,2)], 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0
data <- data[complete.cases(data),] # exclude row which cantain NA
#==== cluster analysis of the production sitatuon of the survey data ====
start.PS <- "pc"
end.PS <- "fu"
start.col.PS <- match(start.PS, names(data))
end.col.PS <- match(end.PS, names(data))
PS.data <- data[, c(1, start.col.PS:end.col.PS)]

# transform all variable to numeric type

# wss <- (nrow(PS.data)-1)* sum(apply(PS.data, 2, var))
# for (i in 2:15) wss[i] <- sum(kmeans(PS.data, 
#                                      centers=i)$withinss)
# plot(1:15, wss, type="b", xlab="Number of Clusters",
#      ylab="Within groups sum of squares")


# distance matrix
dist.PS <- daisy(PS.data[-1])
## Warning in daisy(PS.data[-1]): binary variable(s) 1, 2, 6 treated as
## interval scaled
cluster.PS <- hclust(dist.PS, method = "average")

dendro.PS <- as.dendrogram(cluster.PS)
plot(dendro.PS, center = T, nodePar = list(lab.cex = 0.6,
                                          lab.col = "black", pch = NA),
     main = "Dendrogram for Production situation")

# draw retangles
rect.hclust(tree = cluster.PS, k=2, border = c("red", "blue"))

#number of members in each cluster
PS.no <- cutree(cluster.PS, k = 2)

# cophenitic correlation
rcluster.PS <- cophenetic(cluster.PS)
#cor(dist.PS, rcluster.PS)

pheatmap(t(PS.data[-1]), cluster_rows = FALSE, clustering_distance_rows = "daisy",clustering_method = "average" )

data <- cbind(data, PS.no)
data$PS.no <- as.factor(data$PS.no)

data %>% ggplot(aes(x = PS.no)) + geom_histogram() + facet_grid(~country)

subset the data by country

name.country <- levels(data$country)

by.country.data <- list()

for(i in 1:length(name.country)){
  temp <- data %>% 
    filter(country == name.country[i])
  by.country.data[[i]] <- temp
}
# The profile of PS no 
clus.PS.data <- data[, -c(1, 17:56)]

m.PS.data <- melt(clus.PS.data)
## Using country, PS.no as id variables
name.variable <- levels(m.PS.data$variable)

name.PS.no <- levels(m.PS.data$PS.no)
data %>% ggplot(aes(x = PS.no)) + geom_histogram() + facet_grid(~country)

subset the injuires profiles

#head(data)
# select only the variables related to the injury profiles

start.IP <- "dhx"
end.IP <- "rtx"
start.col.IP <- match(start.IP, names(data))
end.col.IP <- match(end.IP, names(data))

IP.data <- data[start.col.IP:end.col.IP]

IP.data <- IP.data[ ,apply(IP.data, 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0

groups <- paste(data$country, data$PS.no, sep = "_")

IP.data <- cbind(groups, IP.data)

IP.data[is.na(IP.data)] <- 0

trts <- as.vector(unique(IP.data$groups))

Co-occurence analysis iof injurioes profile

#=====co_occurrence_pairwise.R====
results <- matrix(nrow = 0, ncol = 7)
options(warnings = -1)
for(a in 1:length(trts)){
  #pull the first element from the vector of treatments
  trt.temp <- trts[a]
  #subset the dataset for those treatments
  temp <- subset(IP.data, groups == trt.temp)
  
  #in this case the community data started at column 6, so the loop for co-occurrence has to start at that point
  for(b in 2:(dim(temp)[2]-1)){
    #every species will be compared to every other species, so there has to be another loop that iterates down the rest of the columns
    for(c in (b+1):(dim(temp)[2])){
      
      #summing the abundances of species of the columns that will be compared
      species1.ab <- sum(temp[,b])
      species2.ab <- sum(temp[,c])
      #if the column is all 0's no co-occurrence will be performed
      if(species1.ab >1 & species2.ab >1){
        test <- cor.test(temp[,b], temp[,c], method = "spearman", na.action = na.rm, exact = FALSE)
        # There are warnings when setting exact = TRUE because of ties from the output of Spearman's correlation
        # stackoverflow.com/questions/10711395/spear-man-correlation and ties
        # It would be still valid if the data is not normally distributed.
        rho <- test$estimate
        p.value <- test$p.value
      }
      
      if(species1.ab <=1 | species2.ab <= 1){
        rho<-0
        p.value<-1
      } 
      
      new.row <- c(trts[a], names(temp)[b], names(temp)[c], rho, p.value, species1.ab, species2.ab)
      results <- rbind(results, new.row)            
      
    }
    
  }
  print(a/length(trts))
  
}
## [1] 0.1111111
## [1] 0.2222222
## [1] 0.3333333
## [1] 0.4444444
## [1] 0.5555556
## [1] 0.6666667
## [1] 0.7777778
## [1] 0.8888889
## [1] 1
results<-data.frame(data.matrix(results))
names(results)<-c("trt","taxa1","taxa2","rho","p.value","ab1","ab2")

#making sure certain variables are factors
results$trt <- as.factor(results$trt)
results$taxa1 <- as.character(as.factor(results$taxa1))
results$taxa2 <- as.character(as.factor(results$taxa2))
results$rho <- as.numeric(as.character(results$rho))
results$p.value <- as.numeric(as.character(results$p.value))
results$ab1 <- as.numeric(as.character(results$ab1))
results$ab2 <- as.numeric(as.character(results$ab2))

str(results)
## 'data.frame':    2484 obs. of  7 variables:
##  $ trt    : Factor w/ 9 levels "IDN_1","IDN_2",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ taxa1  : chr  "dhx" "dhx" "dhx" "dhx" ...
##  $ taxa2  : chr  "whx" "ssx" "defa" "bphx" ...
##  $ rho    : num  0.0652 -0.1227 0.1899 -0.117 -0.5074 ...
##  $ p.value: num  0.689188 0.45061 0.240542 0.472163 0.000832 ...
##  $ ab1    : num  1307 1307 1307 1307 1307 ...
##  $ ab2    : num  2089 109 1941 1286 84 ...
head(results)
##     trt taxa1 taxa2         rho      p.value  ab1  ab2
## 1 PHL_1   dhx   whx  0.06524012 0.6891879742 1307 2089
## 2 PHL_1   dhx   ssx -0.12272044 0.4506097948 1307  109
## 3 PHL_1   dhx  defa  0.18989475 0.2405423584 1307 1941
## 4 PHL_1   dhx  bphx -0.11699678 0.4721633481 1307 1286
## 5 PHL_1   dhx  wbpx -0.50743562 0.0008316712 1307   84
## 6 PHL_1   dhx   awx  0.00000000 1.0000000000 1307    1
results$newrho <- ifelse(abs(results$rho) > 0.2, results$rho, 0)
#  IDN1 <- results %>% filter(trt == "IDN_1")
#  IDN2 <- results %>% filter(trt == "IDN_2")
#  IND1 <- results %>% filter(trt == "IND_1")
#  IND2 <- results %>% filter(trt == "IND_2")
#  THA1 <- results %>% filter(trt == "THA_1")
#  THA2 <- results %>% filter(trt == "THA_2")
#  VNM1 <- results %>% filter(trt == "VNM_1")
#  VNM2 <- results %>% filter(trt == "VNM_1")
#  PHL1 <- results %>% filter(trt == "PHL_1")
results.by.group <- list()
name.groups <- c("IDN_1", "IDN_2", "IND_1", "IND_2", "THA_1", "THA_2", "VNM_1", "VNM_2", "PHL_1") #sort(as.vector(unique(groups)))
for(i in 1: length(name.groups)){
  
  results.by.group[[i]] <- subset(results, trt == name.groups[i])
}

# head(results_sub.by.group[[1]][,2:3]) # get the list

g  <- list()
for(i in 1:length(name.groups)){
  g[[i]] <- graph.edgelist(as.matrix(results.by.group[[i]][,2:3]), directed = FALSE)
#== adjust layout
l <- layout.circle(g[[i]])
#== adjust vertices

V(g[[i]])$color <- "tomato"
V(g[[i]])$frame.color <- "gray40"
V(g[[i]])$shape <- "circle"
V(g[[i]])$size <- 25
V(g[[i]])$label.color <- "white"
V(g[[i]])$label.font <- 2
V(g[[i]])$label.family <- "Helvetica"
V(g[[i]])$label.cex <- 0.7

#== adjust the edge
E(g[[i]])$weight <- as.matrix(results.by.group[[i]][, 8])
E(g[[i]])$width <- 1 + E(g[[i]])$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(results.by.group[[i]]$newrho, breaks = c(-1, 0, 1), include.lowest = TRUE)
#levels(colc)[1] <- "0"
E(g[[i]])$color <- col[colc]

g[[i]] <- delete_edges(g[[i]], which(E(g[[i]])$weight == 0))
}
adj.mat <- list()
i <- 2
for(i in 1:length(g)){
  
adj.mat[[i]] <- as_adjacency_matrix(g[[i]], attr = "weight")

adj.mat[[i]][is.na(adj.mat[[i]])] <- 0

assign(paste("adj.mat.", name.groups[i], sep = ""), adj.mat[[i]])

}
## Note: method with signature 'CsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
##  target signature 'dgCMatrix#ngCMatrix#missing#numeric'.
##  "Matrix#nsparseMatrix#missing#replValue" would also be valid
## Note: method with signature 'CsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
##  target signature 'dgCMatrix#nsCMatrix#missing#numeric'.
##  "Matrix#nsparseMatrix#missing#replValue" would also be valid

Now we will have nine adjacency matrix based on Spearman’s correlation of Indonesia PS1, Indonesia PS2, India PS1, India PS2, Thailand PS1, Thailand PS2, Vietnam PS1, Vietnam PS2, and Philippines PS1

#adj.mat.IDN_1, adj.mat.IDN_2, adj.mat.IND_1, adj.matIND_2, adj.mat.THA_1, adj.mat.THA2, adj.mat.VNM_1, adj.mat.VNM2, adj.mat.PHL_1
pair.IP.list <- matrix(nrow = 0, ncol = 2)
  for(b in 2:(dim(IP.data)[2]-1)){
    for(c in (b+1):(dim(IP.data)[2])){
      new.row <- c(names(IP.data)[b], names(IP.data)[c])
      pair.IP.list <- rbind(pair.IP.list, new.row)          
    }
  }
pair.IP.list <- data.frame(data.matrix(pair.IP.list))
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276
## --> row.names NOT used
names(pair.IP.list)<-c("IPvar1","IPvar2")

### Calculation of differential correlations Fisher’s z-test was used to identify significant differences between 2 correlations, based on its stringency test and its provision of conservative estimates of true differential correlations among molecules between 2 experimental conditions in the omics data. To test whether the 2 correlation coefficients were significantly different, we first transformed correlation coefficients for each of the 2 conditions, rA and rB, into \(Z_A\) and \(Z_B\), respectively. The Fisher’s transformation of coefficient \(r_A\) is defined by: \(Z = \frac{1}{2}\log{\frac{1+r_A}{1-r_A}}}\)

Similarly,

Fisher’s z-transformation of r is defined as

\(Z = \frac{Z_A - Z_B}{\sqrt{\frac{1}{n_A} + \frac{1}{n_B}}}\)

we transform coefficient rB into ZB. Differences between the two cor- relations can be tested using the following Eq. (1)nA and nB represent the sample size for each of the conditions for each biomolecule pair (Fukushima et al., 2011; Fukushima et al., 2012; Morgenthal et al., 2006). The Z value has an approximately Gaussian distribution under the null hypothesis that the population correlations are equal. Controlling the FDR described by Benjamini and Hochberg (1995) is a stringent and practical method in multiple testing problems. However, while it assumes all tests to be independent, this is not the case for correlation tests. We therefore used the local false-discovery rate (fdr) derived from the fdrtool package (Strimmer, 2008).

#cortest.normal(adj.mat[[1]], adj.mat[[2]], n1=1000, n2=1000)
diff.corr   <- function(data1,n1, data2, n2, N){
    cc1 <- data1
    cc2 <- data2
    ccc1 <- as.vector(cc1[lower.tri(cc1)])
    ccc2 <- as.vector(cc2[lower.tri(cc2)])
    n1 <- n1
    n2 <- n2
    n <- N
    N <- n * (n - 1)/2
    p1 <- rep(1, N)
    p2 <- rep(1, N)
    pdiff <- rep(1, N)
    diff <- rep(1, N)
    mol.names <- rownames(adj.mat[[1]])
    p1 <- cor2.test(n1, ccc1)
    p2 <- cor2.test(n2, ccc2)
    pdiff <- compcorr(n1, ccc1, n2, ccc2)$pval
    diff <- ccc1 - ccc2
    myindex <- which((lower.tri(cc1)) == TRUE, arr.ind = TRUE)
    mol.names1 <- mol.names[myindex[, 2]]
    mol.names2 <- mol.names[myindex[, 1]]
    fin.ind <- pdiff < 0.05
    res <- cbind(mol.names1[fin.ind], mol.names2[fin.ind], ccc1[fin.ind], p1[fin.ind], ccc2[fin.ind], p2[fin.ind], pdiff[fin.ind], diff[fin.ind])
    res <- as.data.frame(res)
    names(res) <- c("var1", "var2", "r1", "p1", "r2", "p2", "p.difference", "difr")
    str(res)
    res$var1 <- as.character(res$var1)
    res$var2 <- as.character(res$var2)
    res$r1 <- as.numeric(as.character(res$r1))
    res$p1 <- as.numeric(as.character(res$p1))
    res$r2 <- as.numeric(as.character(res$r2))
    res$p2  <- as.numeric(as.character(res$p2))
    res$p.difference <- as.numeric(as.character(res$p.difference))
    res$difr <- as.numeric(as.character(res$difr))
    return(res)
}

Differential Network analysis of Indonesia

The pairs of injury profiles that are different significantly between PS1 and PS2 of Indonesia

  res <- diff.corr(data1 = adj.mat.IDN_1, 24, data2 = adj.mat.IDN_2, 24, 100)
## 'data.frame':    2 obs. of  8 variables:
##  $ var1        : Factor w/ 2 levels "rbx","shbx": 1 2
##  $ var2        : Factor w/ 2 levels "rbbx","shrx": 1 2
##  $ r1          : Factor w/ 1 level "0": 1 1
##  $ p1          : Factor w/ 1 level "1": 1 1
##  $ r2          : Factor w/ 2 levels "0.618016617766193",..: 2 1
##  $ p2          : Factor w/ 2 levels "0.000975682010393041",..: 1 2
##  $ p.difference: Factor w/ 2 levels "0.0163508079151282",..: 1 2
##  $ difr        : Factor w/ 2 levels "-0.618016617766193",..: 2 1
  diff_comb <- left_join(pair.IP.list, res[, -c(3:7)], by = c("IPvar1" = "var1", "IPvar2" = "var2"))
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
  diff_comb[is.na(diff_comb)] <- 0
  res
##   var1 var2 r1 p1        r2          p2 p.difference       difr
## 1  rbx rbbx  0  1 0.6297257 0.000975682   0.01635081 -0.6297257
## 2 shbx shrx  0  1 0.6180166 0.001289351   0.01934238 -0.6180166
  par(mfrow = c(1,3))
com_g <- graph.edgelist(as.matrix(diff_comb[1:2]), directed = FALSE)
#== adjust vertices

V(com_g)$color <- "tomato"
V(com_g)$frame.color <- "gray40"
V(com_g)$shape <- "circle"
V(com_g)$size <- 25
V(com_g)$label.color <- "white"
V(com_g)$label.font <- 2
V(com_g)$label.family <- "Helvetica"
V(com_g)$label.cex <- 0.7

#== adjust the edge
E(com_g)$weight <- as.matrix(diff_comb[, 3])
E(com_g)$width <- 1 + E(com_g)$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(diff_comb$difr, breaks = c(-1, 0, 1), include.lowest = TRUE)
#levels(colc)[1] <- "0"
E(com_g)$color <- col[colc]

com_g <- delete_edges(com_g, which(E(com_g)$weight == 0))
plot(g[[1]], layout = layout.circle, main = "Indonesia at PS1")
plot(com_g, layout = layout.circle, main = "Differential network \n between PS1 and PS2 ")
plot(g[[2]], layout = layout.circle, main = "Indonesia at PS2")

Differential Network analysis of India

The pairs of injury profiles that are different significantly between PS1 and PS2 of India

  res2 <- diff.corr(data1 = adj.mat.IND_1, 24, data2 = adj.mat.IND_2, 24, 104)
## 'data.frame':    3 obs. of  8 variables:
##  $ var1        : Factor w/ 3 levels "bphx","fsmx",..: 1 3 2
##  $ var2        : Factor w/ 2 levels "fsmx","nbx": 1 2 2
##  $ r1          : Factor w/ 3 levels "-0.638965963104735",..: 2 1 3
##  $ p1          : Factor w/ 3 levels "0.000337521735415126",..: 3 2 1
##  $ r2          : Factor w/ 2 levels "0","0.549593752955177": 2 1 1
##  $ p2          : Factor w/ 2 levels "0.00540368298804368",..: 1 2 2
##  $ p.difference: Factor w/ 3 levels "0.00855182620104511",..: 3 2 1
##  $ difr        : Factor w/ 3 levels "-0.549593752955177",..: 1 2 3
  diff_comb <- left_join(pair.IP.list, res2[, -c(3:7)], by = c("IPvar1" = "var1", "IPvar2" = "var2"))
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
  diff_comb[is.na(diff_comb)] <- 0
  res2
##   var1 var2         r1           p1        r2          p2 p.difference
## 1 bphx fsmx  0.0000000 1.0000000000 0.5495938 0.005403683  0.045295727
## 2 shbx  nbx -0.6389660 0.0007767876 0.0000000 1.000000000  0.014242243
## 3 fsmx  nbx  0.6704014 0.0003375217 0.0000000 1.000000000  0.008551826
##         difr
## 1 -0.5495938
## 2 -0.6389660
## 3  0.6704014
   par(mfrow = c(1,3))
    com_g <- graph.edgelist(as.matrix(diff_comb[1:2]), directed = FALSE)
#== adjust vertices

V(com_g)$color <- "tomato"
V(com_g)$frame.color <- "gray40"
V(com_g)$shape <- "circle"
V(com_g)$size <- 25
V(com_g)$label.color <- "white"
V(com_g)$label.font <- 2
V(com_g)$label.family <- "Helvetica"
V(com_g)$label.cex <- 0.7

#== adjust the edge
E(com_g)$weight <- as.matrix(diff_comb[, 3])
E(com_g)$width <- 1 + E(com_g)$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(diff_comb$difr, breaks = c(-1, 0, 1), include.lowest = TRUE)
#levels(colc)[1] <- "0"
E(com_g)$color <- col[colc]

com_g <- delete_edges(com_g, which(E(com_g)$weight == 0))
plot(g[[3]], layout = layout.circle, main = "India at PS1")
plot(com_g, layout = layout.circle, main = "Differential network \n between PS1 and PS2 ")
plot(g[[4]], layout = layout.circle, main = "INdia at PS2")

Differential Network analysis of Thailand

The pairs of injury profiles that are different significantly between PS1 and PS2 of Thaialnd

  res2 <- diff.corr(data1 = adj.mat.THA_1, 24, data2 = adj.mat.THA_2, 24, 105)
## 'data.frame':    1 obs. of  8 variables:
##  $ var1        : Factor w/ 1 level "lba": 1
##  $ var2        : Factor w/ 1 level "shbx": 1
##  $ r1          : Factor w/ 1 level "-0.33839819964214": 1
##  $ p1          : Factor w/ 1 level "0.105783730450418": 1
##  $ r2          : Factor w/ 1 level "0.293145674356346": 1
##  $ p2          : Factor w/ 1 level "0.16446383583604": 1
##  $ p.difference: Factor w/ 1 level "0.033994983692041": 1
##  $ difr        : Factor w/ 1 level "-0.631543873998486": 1
  diff_comb <- left_join(pair.IP.list, res2[, -c(3:7)], by = c("IPvar1" = "var1", "IPvar2" = "var2"))
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
  diff_comb[is.na(diff_comb)] <- 0
  res2
##   var1 var2         r1        p1        r2        p2 p.difference
## 1  lba shbx -0.3383982 0.1057837 0.2931457 0.1644638   0.03399498
##         difr
## 1 -0.6315439
   par(mfrow = c(1,3))
com_g <- graph.edgelist(as.matrix(diff_comb[1:2]), directed = FALSE)
#== adjust vertices

V(com_g)$color <- "tomato"
V(com_g)$frame.color <- "gray40"
V(com_g)$shape <- "circle"
V(com_g)$size <- 25
V(com_g)$label.color <- "white"
V(com_g)$label.font <- 2
V(com_g)$label.family <- "Helvetica"
V(com_g)$label.cex <- 0.7

#== adjust the edge
E(com_g)$weight <- as.matrix(diff_comb[, 3])
E(com_g)$width <- 1 + E(com_g)$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(diff_comb$difr, breaks = c(-1, 0, 1), include.lowest = TRUE)
#levels(colc)[1] <- "0"
E(com_g)$color <- col[colc]

com_g <- delete_edges(com_g, which(E(com_g)$weight == 0))
plot(g[[5]], layout = layout.circle, main = "Thaialnd at PS1")
plot(com_g, layout = layout.circle, main = "Differential network \n between PS1 and PS2")
plot(g[[6]], layout = layout.circle, main = " Thailand at PS2")

Differential Network analysis of Vietnam

The pairs of injury profiles that are different significantly between PS1 and PS2 of Vitnam

  res2 <- diff.corr(data1 = adj.mat.VNM_1, 24, data2 = adj.mat.VNM_2, 24, 105)
## 'data.frame':    6 obs. of  8 variables:
##  $ var1        : Factor w/ 6 levels "bsa","defa","rbx",..: 6 2 5 3 1 4
##  $ var2        : Factor w/ 5 levels "nbsa","nbx","rsdx",..: 3 5 1 4 2 2
##  $ r1          : Factor w/ 6 levels "-0.556443296678694",..: 6 5 1 3 2 4
##  $ p1          : Factor w/ 6 levels "0.000925099920001324",..: 1 3 5 6 2 4
##  $ r2          : Factor w/ 3 levels "-0.205469676672969",..: 2 3 3 1 3 3
##  $ p2          : Factor w/ 3 levels "0.215469830227002",..: 1 3 3 2 3 3
##  $ p.difference: Factor w/ 6 levels "0.00102582911299476",..: 1 4 6 2 3 5
##  $ difr        : Factor w/ 6 levels "-0.556443296678694",..: 6 4 1 5 2 3
  diff_comb <- left_join(pair.IP.list, res2[, -c(3:7)], by = c("IPvar1" = "var1", "IPvar2" = "var2"))
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
## Warning in left_join_impl(x, y, by$x, by$y): joining character vector and
## factor, coercing into character vector
  diff_comb[is.na(diff_comb)] <- 0
  res2
##   var1 var2         r1           p1         r2        p2 p.difference
## 1  whx rsdx  0.6319098 0.0009250999 -0.2623898 0.2154698  0.001025829
## 2 defa shrx  0.5822719 0.0028332224  0.0000000 1.0000000  0.030948013
## 3 wbpx nbsa -0.5564433 0.0047449481  0.0000000 1.0000000  0.041964526
## 4  rbx shbx  0.4834204 0.0167035186 -0.2054697 0.3354541  0.017102023
## 5  bsa  nbx -0.5874529 0.0025417535  0.0000000 1.0000000  0.029016848
## 6 shrx  nbx  0.5775572 0.0031226151  0.0000000 1.0000000  0.032783763
##         difr
## 1  0.8942996
## 2  0.5822719
## 3 -0.5564433
## 4  0.6888900
## 5 -0.5874529
## 6  0.5775572
  par(mfrow = c(1,3))
    com_g <- graph.edgelist(as.matrix(diff_comb[1:2]), directed = FALSE)
#== adjust vertices

V(com_g)$color <- "tomato"
V(com_g)$frame.color <- "gray40"
V(com_g)$shape <- "circle"
V(com_g)$size <- 25
V(com_g)$label.color <- "white"
V(com_g)$label.font <- 2
V(com_g)$label.family <- "Helvetica"
V(com_g)$label.cex <- 0.7

#== adjust the edge
E(com_g)$weight <- as.matrix(diff_comb[, 3])
E(com_g)$width <- 1 + E(com_g)$weight*5

col <- c("firebrick3", "forestgreen")
colc <- cut(diff_comb$difr, breaks = c(-1, 0, 1), include.lowest = TRUE)
#levels(colc)[1] <- "0"
E(com_g)$color <- col[colc]

com_g <- delete_edges(com_g, which(E(com_g)$weight == 0))

plot(g[[7]], layout = layout.circle , main = "Vietnam at PS1")
plot(com_g, layout = layout.circle, main = "Differential network \n between PS1 and PS2 ")
plot(g[[8]], layout = layout.circle, main = "Vietnam at PS2")

See how the yield data are distributed

#head(data)
# select only the variables related to the injury profiles
start.IP <- "dhx"
end.IP <- "rtx"
start.col.IP <- match(start.IP, names(data))
end.col.IP <- match(end.IP, names(data))

IP.data <- data[start.col.IP:end.col.IP]

IP.data <- IP.data[ ,apply(IP.data, 2, var, na.rm = TRUE) != 0] # exclude the column with variation = 0
yield <- data$yield

IP.Y.data <- cbind(yield, IP.data)

IP.Y.data[is.na(IP.Y.data)] <- 0
#hist(IP.Y.data$yield)
# from this histogram 
# I would group the data into six groups of yields.
ylevel <- ifelse(IP.Y.data$yield <= 3, "Y3", 
                           ifelse( 3 < IP.Y.data$yield & IP.Y.data$yield <= 4, "Y4",
                                  ifelse(4 < IP.Y.data$yield & IP.Y.data$yield <= 5, "Y5",
                                         ifelse(5 < IP.Y.data$yield & IP.Y.data$yield <= 6, "Y6",
                                                ifelse(6 < IP.Y.data$yield & IP.Y.data$yield <= 7, "Y7", "Y8"
                                                       )))))
# y3 <- subset(IP.Y.data, yield <= 3)
# y4 <- subset(IP.Y.data, 3 < yield | yield >= 4 )
# y5 <- subset(IP.Y.data, 4 < yield | yield >= 5 )
# y6 <- subset(IP.Y.data, 5 < yield | yield >= 6 )
# y7 <- subset(IP.Y.data, 6 < yield | yield >= 7 )
# y8 <- subset(IP.Y.data, 7 < yield | yield >= 8 )
IP.Y.data <- cbind(ylevel, IP.Y.data)
IP.Y.data$ylevel <- as.factor(IP.Y.data$ylevel)
ylevels <- levels(IP.Y.data$ylevel)
IP.Y.data$activity.level <- rowSums(IP.Y.data[-1])

ggplot(IP.Y.data, aes(x= as.factor(ylevel), y= activity.level)) + 
  geom_boxplot() +
  stat_summary(fun.y = median, geom = "line", aes(group = 1)) +
  stat_summary(func.y = median, geom = "point")

ggplot(IP.Y.data, aes(x= yield, y= activity.level)) + geom_point()

IP.Y.data.new <- IP.Y.data[, !names(IP.Y.data) %in% c("yield", "activity.level")]
results <- matrix(nrow = 0, ncol = 7)
options(warnings = -1)
for(a in 1:length(ylevels)){
  #pull the first element from the vector of treatments
  y.temp <- ylevels[a]
  #subset the dataset for those treatments
  temp <- subset(IP.Y.data.new, ylevel == y.temp)
  
  #in this case the community data started at column 6, so the loop for co-occurrence has to start at that point
  for(b in 2:(dim(temp)[2]-1)){
    #every species will be compared to every other species, so there has to be another loop that iterates down the rest of the columns
    for(c in (b+1):(dim(temp)[2])){
      
      #summing the abundances of species of the columns that will be compared
      species1.ab <- sum(temp[, b])
      species2.ab <- sum(temp[, c])
      #if the column is all 0's no co-occurrence will be performed
      if(species1.ab >1 & species2.ab >1){
        test <- cor.test(temp[,b], temp[,c], method = "spearman", na.action = na.rm, exact = FALSE)
        # There are warnings when setting exact = TRUE because of ties from the output of Spearman's correlation
        # stackoverflow.com/questions/10711395/spear-man-correlation and ties
        # It would be still valid if the data is not normally distributed.
        rho <- test$estimate
        p.value <- test$p.value
      }
      
      if(species1.ab <=1 | species2.ab <= 1){
        rho<-0
        p.value<-1
      } 
      
      new.row <- c(ylevels[a], names(temp)[b], names(temp)[c], rho, p.value, species1.ab, species2.ab)
      results <- rbind(results, new.row)            
      
    }
    
  }
  print(a/length(ylevels))
  
}
## [1] 0.1666667
## [1] 0.3333333
## [1] 0.5
## [1] 0.6666667
## [1] 0.8333333
## [1] 1
results <- data.frame(data.matrix(results))
names(results) <- c("ylevel","taxa1","taxa2","rho","p.value","ab1","ab2")

#making sure certain variables are factors
results$ylevel <- as.factor(results$ylevel)
results$taxa1 <- as.character(as.factor(results$taxa1))
results$taxa2 <- as.character(as.factor(results$taxa2))
results$rho <- as.numeric(as.character(results$rho))
results$p.value <- as.numeric(as.character(results$p.value))
results$ab1 <- as.numeric(as.character(results$ab1))
results$ab2 <- as.numeric(as.character(results$ab2))

str(results)
## 'data.frame':    1656 obs. of  7 variables:
##  $ ylevel : Factor w/ 6 levels "Y3","Y4","Y5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ taxa1  : chr  "dhx" "dhx" "dhx" "dhx" ...
##  $ taxa2  : chr  "whx" "ssx" "defa" "bphx" ...
##  $ rho    : num  0.1425 0.3869 0.2996 0.0659 -0.1179 ...
##  $ p.value: num  0.5165 0.0682 0.1648 0.7652 0.5921 ...
##  $ ab1    : num  282 282 282 282 282 282 282 282 282 282 ...
##  $ ab2    : num  1243 208 52 1109 83 ...
head(results)
##   ylevel taxa1 taxa2         rho    p.value ab1  ab2
## 1     Y3   dhx   whx  0.14253716 0.51647489 282 1243
## 2     Y3   dhx   ssx  0.38687783 0.06819797 282  208
## 3     Y3   dhx  defa  0.29964515 0.16481128 282   52
## 4     Y3   dhx  bphx  0.06588643 0.76518142 282 1109
## 5     Y3   dhx  wbpx -0.11789000 0.59214825 282   83
## 6     Y3   dhx   awx  0.00000000 1.00000000 282    0
 # sub_y.results <- subset(results, as.numeric(as.character(abs(rho))) > 0.25)
  sub_y.results <- results %>% filter(p.value < 0.05)

  sub_ylevels.results <- list()
  
  for(i in 1: length(ylevels)){
  
  sub_ylevels.results[[i]] <- subset(sub_y.results, ylevel == ylevels[i])
  }
# head(results_sub.by.group[[1]][,2:3]) # get the list
par(mfrow = c(2, 3))
  
 ynet  <- list()

for(i in 1:length(ylevels)){
  
  ynet[[i]] <- graph.edgelist(as.matrix(sub_ylevels.results[[i]][, 2:3]), directed = FALSE)
#== adjust layout

  l <- layout.circle(ynet[[i]])
#== adjust vertices
  V(ynet[[i]])$color <- "tomato"
  V(ynet[[i]])$frame.color <- "gray40"
  V(ynet[[i]])$shape <- "circle"
  V(ynet[[i]])$size <- 25
  V(ynet[[i]])$label.color <- "white"
  V(ynet[[i]])$label.font <- 2
  V(ynet[[i]])$label.family <- "Helvetica"
  V(ynet[[i]])$label.cex <- 0.7

#== adjust the edge
  E(ynet[[i]])$weight <- as.matrix(sub_ylevels.results[[i]][, 4])
  
  E(ynet[[i]])$width <- 1 + E(ynet[[i]])$weight*5
  
  col <- c("firebrick3", "forestgreen")
  
  colc <- cut(sub_ylevels.results[[i]]$rho, breaks = c(-1, 0, 1), include.lowest = TRUE)
  
  E(ynet[[i]])$color <- col[colc]

#== plot network model
  plot(ynet[[i]], layout = l * 1.0, main = paste( "network model of", ylevels[i]))
}

network.value <- list()
i <-1

for(i in 1:length(ynet)){
  
  E(ynet[[i]])$weight <- abs(as.matrix(sub_ylevels.results[[i]][, 4]))
  
  network.value[[i]] <- data.frame(
  ylevel = ylevels[i],
  node = V(ynet[[i]])$name,
  strength = graph.strength(ynet[[i]]),
  betweenness = betweenness(ynet[[i]]),
  closeness = closeness(ynet[[i]]),
  eigenvector = evcent(ynet[[i]])$vector,
  clusterCoef = transitivity(ynet[[i]], type=c("local"))
  )
}
  network.value <- as.data.frame(do.call("rbind", network.value))
  row.names(network.value ) <- NULL
  p1 <- ggplot(network.value, aes(x = ylevel, y = closeness)) + geom_boxplot()
  p2 <- ggplot(network.value, aes(x = ylevel, y = betweenness)) + geom_boxplot()
  p3 <- ggplot(network.value, aes(x = ylevel, y = strength)) + geom_boxplot()
  p4 <- ggplot(network.value, aes(x = ylevel, y = eigenvector)) + geom_boxplot()
  p5 <- ggplot(network.value, aes(x = ylevel, y = clusterCoef)) + geom_boxplot()
  
p6 <- ggplot(IP.Y.data, aes(x= as.factor(ylevel), y= activity.level)) + 
 # geom_boxplot() +
  stat_summary(fun.y = median, geom = "line", aes(group = 1)) 
  #stat_summary(func.y = median, geom = "point")

grid.arrange(p1, p2, p3, p4, p5, p6)
## Warning: Removed 13 rows containing non-finite values (stat_boxplot).